<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046 <<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
geo_name_list = c("GSE129166", "GSE34748", "GSE51675", "GSE15296", "GSE46474", "GSE50084")
geo_list = c(getGEO(filename="Data/GSE129166_series_matrix.txt.gz"), getGEO(filename="Data/GSE34748_series_matrix.txt.gz"), getGEO(filename="Data/GSE51675_series_matrix.txt.gz"), getGEO(filename="Data/GSE15296_series_matrix.txt.gz"), getGEO(filename="Data/GSE46474_series_matrix.txt.gz"), getGEO(filename="Data/GSE50084_series_matrix.txt.gz"))
<<<<<<< HEAD
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 
=======
Using locally cached version of GPL570 found here:
C:\Users\lmcca\AppData\Local\Temp\RtmpYzuDYu/GPL570.soft.gz 
Using locally cached version of GPL570 found here:
C:\Users\lmcca\AppData\Local\Temp\RtmpYzuDYu/GPL570.soft.gz 
Using locally cached version of GPL570 found here:
C:\Users\lmcca\AppData\Local\Temp\RtmpYzuDYu/GPL570.soft.gz 
>>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
for(geo in geo_list) {
  print(fData(geo))
  print(names(which(colSums(is.na(fData(geo)))>0)))
}
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
[1] "GENE"    "TIGR_ID"
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD

All gse have a gene symbol column which can be used to match records except for the last one, GSE50084 which has the gene as the 2nd listed element under gene_assignment, and GSE51675 which is missing all of its gene info.

=======

All gse have a gene symbol column which can be used to match records except for the last one, GSE50084 which has the gene as the 2nd listed element under gene_assignment, and GSE51675 which is missing all of its gene info.

>>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
for(geo in geo_list) {
  print(pData(geo))
  print(names(which(colSums(is.na(pData(geo)))>0)))
}
<<<<<<< HEAD
character(0)
<<<<<<< HEAD
character(0)
<<<<<<< HEAD
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD
character(0)
<<<<<<< HEAD
character(0)
for (geo in geo_list) {
  print(data.frame(t(exprs(geo))))
  print(names(which(colSums(is.na(data.frame(t(exprs(geo)))))>0)))
}
<<<<<<< HEAD
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
<<<<<<< HEAD
character(0)
<<<<<<< HEAD
character(0)
<<<<<<< HEAD
character(0)
<<<<<<< HEAD ======= >>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
character(0)
for (geo in geo_list) {
  df = data.frame(t(exprs(geo)))
  boxplot(df[1:100])
}
<<<<<<< HEAD

Plots for GSE51675 and GSE15296 show that they have been transformed somehow. Other than those, the other 4 seem fairly contained between 0-14 expression intensity.

GSE51675 has been discarded due to small sample size, missing info, and transformed expression values. GSE50084 has been discarded

=======

Plots for GSE51675 and GSE15296 show that they have been transformed somehow. Other than those, the other 4 seem fairly contained between 0-14 expression intensity.

GSE51675 has been discarded due to small sample size, missing info, and transformed expression values. GSE50084 has been discarded

>>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
# Contains "GSE129166", "GSE34748"
length(union(fData(geo_list[[1]])["Gene Symbol"], fData(geo_list[[2]])["Gene Symbol"])[[1]])
<<<<<<< HEAD
[1] 23521
=======
[1] 54675
>>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046
# Contains "GSE129166", "GSE46474"
length(union(fData(geo_list[[1]])["Gene Symbol"], fData(geo_list[[5]])["Gene Symbol"])[[1]])
<<<<<<< HEAD
[1] 23521

All 3 have perfect overlap so the datasets recommended for use are “GSE129166”, “GSE34748”, and “GSE46474”

(Mukund): Also just thinking, but we probably do want to go through the effort of trying to clean another dataset to match these. firstly for the sake of increased difficulty in the eyes of Jean, but also sometimes without going through the process it can be easy to overlook something in the data cleaning aspect. I am just a bit worried because after talking to some other people they have all told me the data cleaning for this is really hard so not sure if we are actually missing something.

GSE34748 = getGEO(filename="Data/GSE34748_series_matrix.txt.gz")
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 
clinical_GSE34748 = pData(GSE34748)

Also just doing some checking and some of these don’t actually have clinical data

GSE46474 = getGEO(filename="Data/GSE46474_series_matrix.txt.gz")
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 
clinical_GSE46474 = pData(GSE46474)

This one has clinical data!!

GSE129166 = getGEO(filename="Data/GSE129166_series_matrix.txt.gz")
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 
clinical_GSE129166 = pData(GSE129166)
View(clinical_GSE129166)

Also no clinical data

GSE129166 = getGEO(filename="Data/GSE129166_series_matrix.txt.gz")
Using locally cached version of GPL570 found here:
/var/folders/81/4fc96fh52150fkrp33z0kklh0000gp/T//RtmpZiAUT6/GPL570.soft.gz 

Mukund CPOP analysis

z1_pairwise = pairwise_col_diff(z1) %>% as.matrix()
Error in h(simpleError(msg, call)) : 
  error in evaluating the argument 'x' in selecting a method for function 'as.matrix': vector memory exhausted (limit reached?)

pre transformation plot

Boxplot to visualise if the arc transformations were good

box1_arc = cbind(boxplot_tbl(z1_arc, index = 1), GSE34748_id)
box2_arc = cbind(boxplot_tbl(z2_arc, index = 1), GSE46474_id)
box3_arc = cbind(boxplot_tbl(z3_arc, index = 1), GSE129166_id)
box4_arc = rbind(box1_arc, box2_arc, box3_arc)

arcplot <-
ggplot(data = box4_arc, aes(x = object, y = means)) +
  geom_point(aes(color = Dataset), size = 0.1) +
  geom_errorbar(aes(ymin = q1,
                    ymax = q3,
                    color = Dataset), size = 0.1,  alpha = 0.2) +
  ggsci::scale_color_d3() +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.x = element_blank()) +
  xlab("Samples") +
  theme(axis.title.y=element_blank()) +
  labs(title = "Arcsine transformation + pairwise difference") +
  theme(plot.title = element_text(size=10))

arcplot

boxplot to see if the log transformation was good

box1_log = cbind(boxplot_tbl(z1_log, index = 1), GSE34748_id)
box2_log = cbind(boxplot_tbl(z2_log, index = 1), GSE46474_id)
box3_log = cbind(boxplot_tbl(z3_log, index = 1), GSE129166_id)
box4_log = rbind(box1_log, box2_log, box3_log)

logplot <-
ggplot(data = box4_log, aes(x = object, y = means)) +
  geom_point(aes(color = Dataset), size = 0.1) +
  geom_errorbar(aes(ymin = q1,
                    ymax = q3,
                    color = Dataset), size = 0.1,  alpha = 0.2) +
  ggsci::scale_color_d3() +
  theme(axis.ticks = element_blank()) +
  theme(axis.text.x = element_blank()) +
  xlab("Samples") +
  theme(axis.title.y=element_blank()) +
  labs(title = "Log transformation + pairwise difference") +
  theme(plot.title = element_text(size=10))

logplot

getting the results vectors

LS0tCnRpdGxlOiAiTGlhbSBSZXNlYXJjaCAxIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciwgaW5jbHVkZT1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodHVuZVIpCmxpYnJhcnkoZGV2dG9vbHMpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh0c2ZlYXR1cmVzKQpsaWJyYXJ5KGNsYXNzKQpsaWJyYXJ5KGN2VG9vbHMpCmxpYnJhcnkocmFuZG9tRm9yZXN0KQpsaWJyYXJ5KEdFT3F1ZXJ5KSAKbGlicmFyeShSLnV0aWxzKQpsaWJyYXJ5KHJlc2hhcGUyKQpsaWJyYXJ5KGxpbW1hKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGUxMDcxKQpsaWJyYXJ5KERUKQpsaWJyYXJ5KHZpcmlkaXMpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KHNjYWxlcykKbGlicmFyeShDUE9QKQpsaWJyYXJ5KG1hdHJpeFN0YXRzKQpgYGAKCmBgYHtyfQpnZW9fbmFtZV9saXN0ID0gYygiR1NFMTI5MTY2IiwgIkdTRTM0NzQ4IiwgIkdTRTUxNjc1IiwgIkdTRTE1Mjk2IiwgIkdTRTQ2NDc0IiwgIkdTRTUwMDg0IikKZ2VvX2xpc3QgPSBjKGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0UxMjkxNjZfc2VyaWVzX21hdHJpeC50eHQuZ3oiKSwgZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTM0NzQ4X3Nlcmllc19tYXRyaXgudHh0Lmd6IiksIGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0U1MTY3NV9zZXJpZXNfbWF0cml4LnR4dC5neiIpLCBnZXRHRU8oZmlsZW5hbWU9IkRhdGEvR1NFMTUyOTZfc2VyaWVzX21hdHJpeC50eHQuZ3oiKSwgZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTQ2NDc0X3Nlcmllc19tYXRyaXgudHh0Lmd6IiksIGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0U1MDA4NF9zZXJpZXNfbWF0cml4LnR4dC5neiIpKQpgYGAKCmBgYHtyfQpmb3IoZ2VvIGluIGdlb19saXN0KSB7CiAgcHJpbnQoZkRhdGEoZ2VvKSkKICBwcmludChuYW1lcyh3aGljaChjb2xTdW1zKGlzLm5hKGZEYXRhKGdlbykpKT4wKSkpCn0KYGBgCkFsbCBnc2UgaGF2ZSBhIGdlbmUgc3ltYm9sIGNvbHVtbiB3aGljaCBjYW4gYmUgdXNlZCB0byBtYXRjaCByZWNvcmRzIGV4Y2VwdCBmb3IgdGhlIGxhc3Qgb25lLCBHU0U1MDA4NCB3aGljaCBoYXMgdGhlIGdlbmUgYXMgdGhlIDJuZCBsaXN0ZWQgZWxlbWVudCB1bmRlciBnZW5lX2Fzc2lnbm1lbnQsIGFuZCBHU0U1MTY3NSB3aGljaCBpcyBtaXNzaW5nIGFsbCBvZiBpdHMgZ2VuZSBpbmZvLiAKCgpgYGB7cn0KZm9yKGdlbyBpbiBnZW9fbGlzdCkgewogIHByaW50KHBEYXRhKGdlbykpCiAgcHJpbnQobmFtZXMod2hpY2goY29sU3Vtcyhpcy5uYShwRGF0YShnZW8pKSk+MCkpKQp9CmBgYAoKYGBge3J9CmZvciAoZ2VvIGluIGdlb19saXN0KSB7CiAgcHJpbnQoZGF0YS5mcmFtZSh0KGV4cHJzKGdlbykpKSkKICBwcmludChuYW1lcyh3aGljaChjb2xTdW1zKGlzLm5hKGRhdGEuZnJhbWUodChleHBycyhnZW8pKSkpKT4wKSkpCn0KYGBgCgpgYGB7cn0KZm9yIChnZW8gaW4gZ2VvX2xpc3QpIHsKICBkZiA9IGRhdGEuZnJhbWUodChleHBycyhnZW8pKSkKICBib3hwbG90KGRmWzE6MTAwXSkKfQpgYGAKUGxvdHMgZm9yIEdTRTUxNjc1IGFuZCBHU0UxNTI5NiBzaG93IHRoYXQgdGhleSBoYXZlIGJlZW4gdHJhbnNmb3JtZWQgc29tZWhvdy4gT3RoZXIgdGhhbiB0aG9zZSwgdGhlIG90aGVyIDQgc2VlbSBmYWlybHkgY29udGFpbmVkIGJldHdlZW4gMC0xNCBleHByZXNzaW9uIGludGVuc2l0eS4KCkdTRTUxNjc1IGhhcyBiZWVuIGRpc2NhcmRlZCBkdWUgdG8gc21hbGwgc2FtcGxlIHNpemUsIG1pc3NpbmcgaW5mbywgYW5kIHRyYW5zZm9ybWVkIGV4cHJlc3Npb24gdmFsdWVzLiBHU0U1MDA4NCBoYXMgYmVlbiBkaXNjYXJkZWQKCgoKYGBge3J9CiMgQ29udGFpbnMgIkdTRTEyOTE2NiIsICJHU0UzNDc0OCIKbGVuZ3RoKHVuaW9uKGZEYXRhKGdlb19saXN0W1sxXV0pWyJHZW5lIFN5bWJvbCJdLCBmRGF0YShnZW9fbGlzdFtbMl1dKVsiR2VuZSBTeW1ib2wiXSlbWzFdXSkKYGBgCgpgYGB7cn0KIyBDb250YWlucyAiR1NFMTI5MTY2IiwgIkdTRTQ2NDc0IgpsZW5ndGgodW5pb24oZkRhdGEoZ2VvX2xpc3RbWzFdXSlbIkdlbmUgU3ltYm9sIl0sIGZEYXRhKGdlb19saXN0W1s1XV0pWyJHZW5lIFN5bWJvbCJdKVtbMV1dKQpgYGAKCkFsbCAzIGhhdmUgcGVyZmVjdCBvdmVybGFwIHNvIHRoZSBkYXRhc2V0cyByZWNvbW1lbmRlZCBmb3IgdXNlIGFyZSAiR1NFMTI5MTY2IiwgIkdTRTM0NzQ4IiwgYW5kICJHU0U0NjQ3NCIKCihNdWt1bmQpOiBBbHNvIGp1c3QgdGhpbmtpbmcsIGJ1dCB3ZSBwcm9iYWJseSBkbyB3YW50IHRvIGdvIHRocm91Z2ggdGhlIGVmZm9ydCBvZiB0cnlpbmcgdG8gY2xlYW4gYW5vdGhlciBkYXRhc2V0IHRvIG1hdGNoIHRoZXNlLiBmaXJzdGx5IGZvciB0aGUgc2FrZSBvZiBpbmNyZWFzZWQgZGlmZmljdWx0eSBpbiB0aGUgZXllcyBvZiBKZWFuLCBidXQgYWxzbyBzb21ldGltZXMgd2l0aG91dCBnb2luZyB0aHJvdWdoIHRoZSBwcm9jZXNzIGl0IGNhbiBiZSBlYXN5IHRvIG92ZXJsb29rIHNvbWV0aGluZyBpbiB0aGUgZGF0YSBjbGVhbmluZyBhc3BlY3QuIEkgYW0ganVzdCBhIGJpdCB3b3JyaWVkIGJlY2F1c2UgYWZ0ZXIgdGFsa2luZyB0byBzb21lIG90aGVyIHBlb3BsZSB0aGV5IGhhdmUgYWxsIHRvbGQgbWUgdGhlIGRhdGEgY2xlYW5pbmcgZm9yIHRoaXMgaXMgcmVhbGx5IGhhcmQgc28gbm90IHN1cmUgaWYgd2UgYXJlIGFjdHVhbGx5IG1pc3Npbmcgc29tZXRoaW5nLiAKCmBgYHtyfQpHU0UzNDc0OCA9IGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0UzNDc0OF9zZXJpZXNfbWF0cml4LnR4dC5neiIpCmNsaW5pY2FsX0dTRTM0NzQ4ID0gcERhdGEoR1NFMzQ3NDgpCgpgYGAKQWxzbyBqdXN0IGRvaW5nIHNvbWUgY2hlY2tpbmcgYW5kIHNvbWUgb2YgdGhlc2UgZG9uJ3QgYWN0dWFsbHkgaGF2ZSBjbGluaWNhbCBkYXRhCgpgYGB7cn0KR1NFNDY0NzQgPSBnZXRHRU8oZmlsZW5hbWU9IkRhdGEvR1NFNDY0NzRfc2VyaWVzX21hdHJpeC50eHQuZ3oiKQpjbGluaWNhbF9HU0U0NjQ3NCA9IHBEYXRhKEdTRTQ2NDc0KQoKYGBgClRoaXMgb25lIGhhcyBjbGluaWNhbCBkYXRhISEKYGBge3J9CkdTRTEyOTE2NiA9IGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0UxMjkxNjZfc2VyaWVzX21hdHJpeC50eHQuZ3oiKQpjbGluaWNhbF9HU0UxMjkxNjYgPSBwRGF0YShHU0UxMjkxNjYpCgpgYGAKQWxzbyBubyBjbGluaWNhbCBkYXRhCmBgYHtyfQojR1NFMTI5MTY2ID0gZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTEyOTE2Nl9zZXJpZXNfbWF0cml4LnR4dC5neiIpCiNjbGluaWNhbF9HU0UxMjkxNjYgPSBwRGF0YShHU0UxMjkxNjYpCgpgYGAKCiMjIE11a3VuZCBDUE9QIGFuYWx5c2lzIApgYGB7cn0KI0NQT1AgZGF0YQoKIyMga2VlcGluZyBvbmx5IHRoZSAxMDAgbW9zdCB2YXJpYWJsZSBnZW5lcyBpbiBteSBkYXRhIGZyYW1lIApleHBfR1NFMzQ3NDggPSAoZXhwcnMoR1NFMzQ3NDgpKQpWYXJpYW5jZSA9IHJvd1ZhcnMoYXMubWF0cml4KGV4cF9HU0UzNDc0OCkpClZhcmlhbmNlID0gYXMuZGF0YS5mcmFtZShWYXJpYW5jZSkKZXhwX0dTRTM0NzQ4ID0gYXMuZGF0YS5mcmFtZShleHBfR1NFMzQ3NDgpCmV4cF9HU0UzNDc0OCA9IGNiaW5kKGV4cF9HU0UzNDc0OCwgdmFyaWFuY2UgPSBWYXJpYW5jZSkKZXhwX0dTRTM0NzQ4ID0gc2xpY2VfbWF4KGV4cF9HU0UzNDc0OCwgb3JkZXJfYnkgPSBWYXJpYW5jZSwgbiA9IDIwMDApCmV4cF9HU0UzNDc0OCA9IHN1YnNldChleHBfR1NFMzQ3NDgsIHNlbGVjdCA9IC1jKFZhcmlhbmNlKSkKcm93X25hbWVzX2V4cF9HU0UzNDc0OCA9IHJvd25hbWVzKGV4cF9HU0UzNDc0OCkKCgpleHBfR1NFNDY0NzQgPSAoZXhwcnMoR1NFNDY0NzQpKQpWYXJpYW5jZSA9IHJvd1ZhcnMoYXMubWF0cml4KGV4cF9HU0U0NjQ3NCkpClZhcmlhbmNlID0gYXMuZGF0YS5mcmFtZShWYXJpYW5jZSkKZXhwX0dTRTQ2NDc0ID0gYXMuZGF0YS5mcmFtZShleHBfR1NFNDY0NzQpCmV4cF9HU0U0NjQ3NCA9IGNiaW5kKGV4cF9HU0U0NjQ3NCwgdmFyaWFuY2UgPSBWYXJpYW5jZSkKZXhwX0dTRTQ2NDc0ID0gc2xpY2VfbWF4KGV4cF9HU0U0NjQ3NCwgb3JkZXJfYnkgPSBWYXJpYW5jZSwgbiA9IDIwMDApCmV4cF9HU0U0NjQ3NCA9IHN1YnNldChleHBfR1NFNDY0NzQsIHNlbGVjdCA9IC1jKFZhcmlhbmNlKSkKcm93X25hbWVzX2V4cF9HU0U0NjQ3NCA9IHJvd25hbWVzKGV4cF9HU0U0NjQ3NCkKCgpleHBfR1NFMTI5MTY2ID0gKGV4cHJzKEdTRTEyOTE2NikpClZhcmlhbmNlID0gcm93VmFycyhhcy5tYXRyaXgoZXhwX0dTRTEyOTE2NikpClZhcmlhbmNlID0gYXMuZGF0YS5mcmFtZShWYXJpYW5jZSkKZXhwX0dTRTEyOTE2NiA9IGFzLmRhdGEuZnJhbWUoZXhwX0dTRTEyOTE2NikKZXhwX0dTRTEyOTE2NiA9IGNiaW5kKGV4cF9HU0UxMjkxNjYsIHZhcmlhbmNlID0gVmFyaWFuY2UpCmV4cF9HU0UxMjkxNjYgPSBzbGljZV9tYXgoZXhwX0dTRTEyOTE2Niwgb3JkZXJfYnkgPSBWYXJpYW5jZSwgbiA9IDIwMDApCmV4cF9HU0UxMjkxNjYgPSBzdWJzZXQoZXhwX0dTRTEyOTE2Niwgc2VsZWN0ID0gLWMoVmFyaWFuY2UpKQpyb3dfbmFtZXNfZXhwX0dTRTEyOTE2NiA9IHJvd25hbWVzKGV4cF9HU0UxMjkxNjYpCgoKaW50ZXJzZWN0aW9uID0gaW50ZXJzZWN0KHJvd19uYW1lc19leHBfR1NFMzQ3NDgsIHJvd19uYW1lc19leHBfR1NFNDY0NzQpCmludGVyc2VjdGlvbiA9IGludGVyc2VjdChpbnRlcnNlY3Rpb24sIHJvd19uYW1lc19leHBfR1NFMTI5MTY2KQoKZXhwX0dTRTM0NzQ4ID0gYXMuZGF0YS5mcmFtZSh0KGFzLm1hdHJpeChleHBfR1NFMzQ3NDgpKSkKZXhwX0dTRTM0NzQ4ID0gc3Vic2V0KGV4cF9HU0UzNDc0OCwgc2VsZWN0ID0gYyhpbnRlcnNlY3Rpb24pKQoKZXhwX0dTRTQ2NDc0ID0gYXMuZGF0YS5mcmFtZSh0KGFzLm1hdHJpeChleHBfR1NFNDY0NzQpKSkKZXhwX0dTRTQ2NDc0ID0gc3Vic2V0KGV4cF9HU0U0NjQ3NCwgc2VsZWN0ID0gYyhpbnRlcnNlY3Rpb24pKQoKZXhwX0dTRTEyOTE2NiA9IGFzLmRhdGEuZnJhbWUodChhcy5tYXRyaXgoZXhwX0dTRTEyOTE2NikpKQpleHBfR1NFMTI5MTY2ID0gc3Vic2V0KGV4cF9HU0UxMjkxNjYsIHNlbGVjdCA9IGMoaW50ZXJzZWN0aW9uKSkKCkdTRTM0NzQ4X2lkIDwtIGRhdGEuZnJhbWUoIkRhdGFzZXQiID0gcmVwKCJHU0UzNDc0OCIsbnJvdyhleHBfR1NFMzQ3NDgpKSkKR1NFNDY0NzRfaWQgPC0gZGF0YS5mcmFtZSgiRGF0YXNldCIgPSByZXAoIkdTRTQ2NDc0Iixucm93KGV4cF9HU0U0NjQ3NCkpKQpHU0UxMjkxNjZfaWQgPC0gZGF0YS5mcmFtZSgiRGF0YXNldCIgPSByZXAoIkdTRTEyOTE2NiIsbnJvdyhleHBfR1NFMTI5MTY2KSkpCgp6MSA9IGV4cF9HU0UzNDc0OCAlPiUgYXMubWF0cml4KCkKejIgPSBleHBfR1NFNDY0NzQgJT4lIGFzLm1hdHJpeCgpCnozID0gZXhwX0dTRTEyOTE2NiAlPiUgYXMubWF0cml4KCkKCiMjIGFyY3NpbmUgdHJhbnNmb3JtYXRpb24KCmV4cF9HU0UzNDc0OF9hcmMgPC0gZXhwX0dTRTM0NzQ4CmV4cF9HU0UzNDc0OF9hcmMgPSBleHBfR1NFMzQ3NDhfYXJjIC8gbWF4KGV4cF9HU0UzNDc0OF9hcmMpCmV4cF9HU0UzNDc0OF9hcmMgPSBhc2luKHNxcnQoZXhwX0dTRTM0NzQ4X2FyYykpCgpleHBfR1NFNDY0NzRfYXJjIDwtIGV4cF9HU0U0NjQ3NApleHBfR1NFNDY0NzRfYXJjID0gZXhwX0dTRTQ2NDc0X2FyYyAvIG1heChleHBfR1NFNDY0NzRfYXJjKQpleHBfR1NFNDY0NzRfYXJjID0gYXNpbihzcXJ0KGV4cF9HU0U0NjQ3NF9hcmMpKQoKZXhwX0dTRTEyOTE2Nl9hcmMgPC0gZXhwX0dTRTEyOTE2NgpleHBfR1NFMTI5MTY2X2FyYyA9IGV4cF9HU0UxMjkxNjZfYXJjIC8gbWF4KGV4cF9HU0UxMjkxNjZfYXJjKQpleHBfR1NFMTI5MTY2X2FyYyA9IGFzaW4oc3FydChleHBfR1NFMTI5MTY2X2FyYykpCgp6MV9wYWlyd2lzZSA9IHBhaXJ3aXNlX2NvbF9kaWZmKHoxKSAlPiUgYXMubWF0cml4KCkKejJfcGFpcndpc2UgPSBwYWlyd2lzZV9jb2xfZGlmZih6MikgJT4lIGFzLm1hdHJpeCgpCnozX3BhaXJ3aXNlID0gcGFpcndpc2VfY29sX2RpZmYoejMpICU+JSBhcy5tYXRyaXgoKQoKCnoxX2FyYyA9IHBhaXJ3aXNlX2NvbF9kaWZmKGV4cF9HU0UzNDc0OF9hcmMpICU+JSBhcy5tYXRyaXgoKQp6Ml9hcmMgPSBwYWlyd2lzZV9jb2xfZGlmZihleHBfR1NFNDY0NzRfYXJjKSAlPiUgYXMubWF0cml4KCkKejNfYXJjID0gcGFpcndpc2VfY29sX2RpZmYoZXhwX0dTRTEyOTE2Nl9hcmMpICU+JSBhcy5tYXRyaXgoKQoKIyMgbG9nIHRyYW5zZm9ybQoKZXhwX0dTRTM0NzQ4X2xvZyA8LSBleHBfR1NFMzQ3NDgKZXhwX0dTRTM0NzQ4X2xvZyA9IGV4cF9HU0UzNDc0OF9sb2cgKyAxCmV4cF9HU0UzNDc0OF9sb2cgPSBsb2coZXhwX0dTRTM0NzQ4X2xvZykKCmV4cF9HU0U0NjQ3NF9sb2cgPC0gZXhwX0dTRTQ2NDc0CmV4cF9HU0U0NjQ3NF9sb2cgPSBleHBfR1NFNDY0NzRfbG9nICsgMQpleHBfR1NFNDY0NzRfbG9nID0gbG9nKGV4cF9HU0U0NjQ3NF9sb2cpCgpleHBfR1NFMTI5MTY2X2xvZyA8LSBleHBfR1NFMTI5MTY2CmV4cF9HU0UxMjkxNjZfbG9nID0gZXhwX0dTRTEyOTE2Nl9sb2cgKyAxCmV4cF9HU0UxMjkxNjZfbG9nID0gbG9nKGV4cF9HU0UxMjkxNjZfbG9nKQoKejFfbG9nID0gcGFpcndpc2VfY29sX2RpZmYoZXhwX0dTRTM0NzQ4X2xvZykgJT4lIGFzLm1hdHJpeCgpCnoyX2xvZyA9IHBhaXJ3aXNlX2NvbF9kaWZmKGV4cF9HU0U0NjQ3NF9sb2cpICU+JSBhcy5tYXRyaXgoKQp6M19sb2cgPSBwYWlyd2lzZV9jb2xfZGlmZihleHBfR1NFMTI5MTY2X2xvZykgJT4lIGFzLm1hdHJpeCgpCgoKYGBgCgojIyBwcmUgdHJhbnNmb3JtYXRpb24gcGxvdApgYGB7cn0KYm94MTEgPSBjYmluZChib3hwbG90X3RibCh6MSwgaW5kZXggPSAxKSwgR1NFMzQ3NDhfaWQpCmJveDIyID0gY2JpbmQoYm94cGxvdF90YmwoejIsIGluZGV4ID0gMSksIEdTRTQ2NDc0X2lkKQpib3gzMyA9IGNiaW5kKGJveHBsb3RfdGJsKHozLCBpbmRleCA9IDEpLCBHU0UxMjkxNjZfaWQpCmJveDQgPSByYmluZChib3gxMSwgYm94MjIsIGJveDMzKQoKZXhwcmVzc2lvbnBsb3QgPC0KZ2dwbG90KGRhdGEgPSBib3g0LCBhZXMoeCA9IG9iamVjdCwgeSA9IG1lYW5zKSkgKwogIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gRGF0YXNldCksIHNpemUgPSAwLjEpICsKICBnZW9tX2Vycm9yYmFyKGFlcyh5bWluID0gcTEsCiAgICAgICAgICAgICAgICAgICAgeW1heCA9IHEzLAogICAgICAgICAgICAgICAgICAgIGNvbG9yID0gRGF0YXNldCksIHNpemUgPSAwLjEsICBhbHBoYSA9IDAuMikgKwogIGdnc2NpOjpzY2FsZV9jb2xvcl9kMygpICsKICB0aGVtZShheGlzLnRpY2tzID0gZWxlbWVudF9ibGFuaygpKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCkpICsKICB0aGVtZShheGlzLnRpdGxlLng9ZWxlbWVudF9ibGFuaygpKSArCiAgdGhlbWUoYXhpcy50aXRsZS55PWVsZW1lbnRfYmxhbmsoKSkgKwogIHlsaW0oMCwxNSkgKyAKICB0aGVtZShsZWdlbmQucG9zaXRpb249ImJvdHRvbSIpICsKICB0aGVtZShsZWdlbmQudGl0bGUgPSBlbGVtZW50X2JsYW5rKCkpICsKICBsYWJzKHRpdGxlID0gIlJhdyBEYXRhIikgKwogIHRoZW1lKHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZT0xMCkpCgpleHByZXNzaW9ucGxvdApgYGAKCiMjIEJveHBsb3QgdG8gdmlzdWFsaXNlIGlmIHRoZSBhcmMgdHJhbnNmb3JtYXRpb25zIHdlcmUgZ29vZApgYGB7cn0KYm94MV9hcmMgPSBjYmluZChib3hwbG90X3RibCh6MV9hcmMsIGluZGV4ID0gMSksIEdTRTM0NzQ4X2lkKQpib3gyX2FyYyA9IGNiaW5kKGJveHBsb3RfdGJsKHoyX2FyYywgaW5kZXggPSAxKSwgR1NFNDY0NzRfaWQpCmJveDNfYXJjID0gY2JpbmQoYm94cGxvdF90YmwoejNfYXJjLCBpbmRleCA9IDEpLCBHU0UxMjkxNjZfaWQpCmJveDRfYXJjID0gcmJpbmQoYm94MV9hcmMsIGJveDJfYXJjLCBib3gzX2FyYykKCmFyY3Bsb3QgPC0KZ2dwbG90KGRhdGEgPSBib3g0X2FyYywgYWVzKHggPSBvYmplY3QsIHkgPSBtZWFucykpICsKICBnZW9tX3BvaW50KGFlcyhjb2xvciA9IERhdGFzZXQpLCBzaXplID0gMC4xKSArCiAgZ2VvbV9lcnJvcmJhcihhZXMoeW1pbiA9IHExLAogICAgICAgICAgICAgICAgICAgIHltYXggPSBxMywKICAgICAgICAgICAgICAgICAgICBjb2xvciA9IERhdGFzZXQpLCBzaXplID0gMC4xLCAgYWxwaGEgPSAwLjIpICsKICBnZ3NjaTo6c2NhbGVfY29sb3JfZDMoKSArCiAgdGhlbWUoYXhpcy50aWNrcyA9IGVsZW1lbnRfYmxhbmsoKSkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF9ibGFuaygpKSArCiAgeGxhYigiU2FtcGxlcyIpICsKICB0aGVtZShheGlzLnRpdGxlLnk9ZWxlbWVudF9ibGFuaygpKSArCiAgbGFicyh0aXRsZSA9ICJBcmNzaW5lIHRyYW5zZm9ybWF0aW9uICsgcGFpcndpc2UgZGlmZmVyZW5jZSIpICsKICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemU9MTApKQoKYXJjcGxvdApgYGAKCiMjIGJveHBsb3QgdG8gc2VlIGlmIHRoZSBsb2cgdHJhbnNmb3JtYXRpb24gd2FzIGdvb2QKYGBge3J9CmJveDFfbG9nID0gY2JpbmQoYm94cGxvdF90YmwoejFfbG9nLCBpbmRleCA9IDEpLCBHU0UzNDc0OF9pZCkKYm94Ml9sb2cgPSBjYmluZChib3hwbG90X3RibCh6Ml9sb2csIGluZGV4ID0gMSksIEdTRTQ2NDc0X2lkKQpib3gzX2xvZyA9IGNiaW5kKGJveHBsb3RfdGJsKHozX2xvZywgaW5kZXggPSAxKSwgR1NFMTI5MTY2X2lkKQpib3g0X2xvZyA9IHJiaW5kKGJveDFfbG9nLCBib3gyX2xvZywgYm94M19sb2cpCgpsb2dwbG90IDwtCmdncGxvdChkYXRhID0gYm94NF9sb2csIGFlcyh4ID0gb2JqZWN0LCB5ID0gbWVhbnMpKSArCiAgZ2VvbV9wb2ludChhZXMoY29sb3IgPSBEYXRhc2V0KSwgc2l6ZSA9IDAuMSkgKwogIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBxMSwKICAgICAgICAgICAgICAgICAgICB5bWF4ID0gcTMsCiAgICAgICAgICAgICAgICAgICAgY29sb3IgPSBEYXRhc2V0KSwgc2l6ZSA9IDAuMSwgIGFscGhhID0gMC4yKSArCiAgZ2dzY2k6OnNjYWxlX2NvbG9yX2QzKCkgKwogIHRoZW1lKGF4aXMudGlja3MgPSBlbGVtZW50X2JsYW5rKCkpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkgKwogIHhsYWIoIlNhbXBsZXMiKSArCiAgdGhlbWUoYXhpcy50aXRsZS55PWVsZW1lbnRfYmxhbmsoKSkgKwogIGxhYnModGl0bGUgPSAiTG9nIHRyYW5zZm9ybWF0aW9uICsgcGFpcndpc2UgZGlmZmVyZW5jZSIpICsKICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemU9MTApKQoKbG9ncGxvdApgYGAKIyMgZ2V0dGluZyB0aGUgcmVzdWx0cyB2ZWN0b3JzIApgYGB7cn0KcERhdGEoR1NFNDY0NzQpCnBEYXRhKEdTRTEyOTE2NikgI2lzIHRoZXJlIHJlamVjdGlvbiBhbmQgc3RhYmxlCnBEYXRhKEdTRTM0NzQ4KSAjbm8gcmVqZWN0IG9yIHN0YWJsZQoKCgojIyMgR1NFMzYwNTkKIyMjIEdTRTQ4NTgxCiMgdGhlc2UgaGF2ZSByZWplY3QgKyBzdGFibGUgYnV0IGNhdGVnb3JpemVkIGluIG1vcmUgZGV0YWlsIC0+IGVpdGhlciBoYXZlIG1vcmUgZ3JvdXBzIHRoYXQgd2UgYXJlIHByZWRpY3RpbmcsIG9yIHdlIGNvdWxkIGRvIHB1cmVseSBiaW5hcnkgCmBgYAoK
=======
[1] 54675

All 3 have perfect overlap so the datasets recommended for use are “GSE129166”, “GSE34748”, and “GSE46474”

length(union(fData(geo_list[[1]])["Gene Symbol"], fData(geo_list[[4]])["Gene Symbol"])[[1]])
[1] 54675
LS0tDQp0aXRsZTogIkxpYW0gUmVzZWFyY2ggMSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyLCBpbmNsdWRlPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHR1bmVSKQ0KbGlicmFyeShkZXZ0b29scykNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkodHNmZWF0dXJlcykNCmxpYnJhcnkoY2xhc3MpDQpsaWJyYXJ5KGN2VG9vbHMpDQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCmxpYnJhcnkoR0VPcXVlcnkpIA0KbGlicmFyeShSLnV0aWxzKQ0KbGlicmFyeShyZXNoYXBlMikNCmxpYnJhcnkobGltbWEpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShlMTA3MSkNCmxpYnJhcnkoRFQpDQpsaWJyYXJ5KHZpcmlkaXMpDQpsaWJyYXJ5KHBsb3RseSkNCmxpYnJhcnkoc2NhbGVzKQ0KYGBgDQoNCmBgYHtyfQ0KZ2VvX25hbWVfbGlzdCA9IGMoIkdTRTEyOTE2NiIsICJHU0UzNDc0OCIsICJHU0U1MTY3NSIsICJHU0UxNTI5NiIsICJHU0U0NjQ3NCIsICJHU0U1MDA4NCIpDQpnZW9fbGlzdCA9IGMoZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTEyOTE2Nl9zZXJpZXNfbWF0cml4LnR4dC5neiIpLCBnZXRHRU8oZmlsZW5hbWU9IkRhdGEvR1NFMzQ3NDhfc2VyaWVzX21hdHJpeC50eHQuZ3oiKSwgZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTUxNjc1X3Nlcmllc19tYXRyaXgudHh0Lmd6IiksIGdldEdFTyhmaWxlbmFtZT0iRGF0YS9HU0UxNTI5Nl9zZXJpZXNfbWF0cml4LnR4dC5neiIpLCBnZXRHRU8oZmlsZW5hbWU9IkRhdGEvR1NFNDY0NzRfc2VyaWVzX21hdHJpeC50eHQuZ3oiKSwgZ2V0R0VPKGZpbGVuYW1lPSJEYXRhL0dTRTUwMDg0X3Nlcmllc19tYXRyaXgudHh0Lmd6IikpDQpgYGANCg0KYGBge3J9DQpmb3IoZ2VvIGluIGdlb19saXN0KSB7DQogIHByaW50KGZEYXRhKGdlbykpDQogIHByaW50KG5hbWVzKHdoaWNoKGNvbFN1bXMoaXMubmEoZkRhdGEoZ2VvKSkpPjApKSkNCn0NCmBgYA0KQWxsIGdzZSBoYXZlIGEgZ2VuZSBzeW1ib2wgY29sdW1uIHdoaWNoIGNhbiBiZSB1c2VkIHRvIG1hdGNoIHJlY29yZHMgZXhjZXB0IGZvciB0aGUgbGFzdCBvbmUsIEdTRTUwMDg0IHdoaWNoIGhhcyB0aGUgZ2VuZSBhcyB0aGUgMm5kIGxpc3RlZCBlbGVtZW50IHVuZGVyIGdlbmVfYXNzaWdubWVudCwgYW5kIEdTRTUxNjc1IHdoaWNoIGlzIG1pc3NpbmcgYWxsIG9mIGl0cyBnZW5lIGluZm8uIA0KDQoNCmBgYHtyfQ0KZm9yKGdlbyBpbiBnZW9fbGlzdCkgew0KICBwcmludChwRGF0YShnZW8pKQ0KICBwcmludChuYW1lcyh3aGljaChjb2xTdW1zKGlzLm5hKHBEYXRhKGdlbykpKT4wKSkpDQp9DQpgYGANCg0KYGBge3J9DQpmb3IgKGdlbyBpbiBnZW9fbGlzdCkgew0KICBwcmludChkYXRhLmZyYW1lKHQoZXhwcnMoZ2VvKSkpKQ0KICBwcmludChuYW1lcyh3aGljaChjb2xTdW1zKGlzLm5hKGRhdGEuZnJhbWUodChleHBycyhnZW8pKSkpKT4wKSkpDQp9DQpgYGANCg0KYGBge3J9DQojIFRPRE86IEZpbmQgdGhlIG91dGxpZXIgZ2VuZXMgYW5kIGNvbXBhcmUNCmZvciAoZ2VvIGluIGdlb19saXN0KSB7DQogIGRmID0gZGF0YS5mcmFtZSh0KGV4cHJzKGdlbykpKQ0KICBib3hwbG90KGRmWzE6MTAwXSkNCn0NCmBgYA0KUGxvdHMgZm9yIEdTRTUxNjc1IGFuZCBHU0UxNTI5NiBzaG93IHRoYXQgdGhleSBoYXZlIGJlZW4gdHJhbnNmb3JtZWQgc29tZWhvdy4gT3RoZXIgdGhhbiB0aG9zZSwgdGhlIG90aGVyIDQgc2VlbSBmYWlybHkgY29udGFpbmVkIGJldHdlZW4gMC0xNCBleHByZXNzaW9uIGludGVuc2l0eS4NCg0KR1NFNTE2NzUgaGFzIGJlZW4gZGlzY2FyZGVkIGR1ZSB0byBzbWFsbCBzYW1wbGUgc2l6ZSwgbWlzc2luZyBpbmZvLCBhbmQgdHJhbnNmb3JtZWQgZXhwcmVzc2lvbiB2YWx1ZXMuIEdTRTUwMDg0IGhhcyBiZWVuIGRpc2NhcmRlZA0KDQoNCg0KYGBge3J9DQojIENvbnRhaW5zICJHU0UxMjkxNjYiLCAiR1NFMzQ3NDgiDQpsZW5ndGgodW5pb24oZkRhdGEoZ2VvX2xpc3RbWzFdXSlbIkdlbmUgU3ltYm9sIl0sIGZEYXRhKGdlb19saXN0W1syXV0pWyJHZW5lIFN5bWJvbCJdKVtbMV1dKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDb250YWlucyAiR1NFMTI5MTY2IiwgIkdTRTQ2NDc0Ig0KbGVuZ3RoKHVuaW9uKGZEYXRhKGdlb19saXN0W1sxXV0pWyJHZW5lIFN5bWJvbCJdLCBmRGF0YShnZW9fbGlzdFtbNV1dKVsiR2VuZSBTeW1ib2wiXSlbWzFdXSkNCmBgYA0KDQpBbGwgMyBoYXZlIHBlcmZlY3Qgb3ZlcmxhcCBzbyB0aGUgZGF0YXNldHMgcmVjb21tZW5kZWQgZm9yIHVzZSBhcmUgIkdTRTEyOTE2NiIsICJHU0UzNDc0OCIsIGFuZCAiR1NFNDY0NzQiDQoNCmBgYHtyfQ0KbGVuZ3RoKHVuaW9uKGZEYXRhKGdlb19saXN0W1sxXV0pWyJHZW5lIFN5bWJvbCJdLCBmRGF0YShnZW9fbGlzdFtbNF1dKVsiR2VuZSBTeW1ib2wiXSlbWzFdXSkNCmBgYA0KDQo=
>>>>>>> 2f1035ad74f18d20708647ab05d71ebd8e7fe046